library(tidyverse)
library(phyloseq)
library(speedyseq)
library(ggrepel)
library(ampvis2)
library(plotly)
library(microbiome)
options(getClass.msg=FALSE) # https://github.com/epurdom/clusterExperiment/issues/66
#this fixes an error message that pops up because the class 'Annotated' is defined in two different packages
'%!in%' <- function(x,y)!('%in%'(x,y))
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_taxa_tests.R")
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_normalisation.R")
## Loading required package: scales
##
## Attaching package: 'scales'
## The following object is masked from 'package:microbiome':
##
## alpha
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
## Loading required package: reshape2
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_alpha.R")
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_beta.R")
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_heatmap.R")
ps = "data/processed/physeq_update_23_11.RDS"
ps %>%
here::here() %>%
readRDS() %>%
phyloseq_get_strains_fast() %>%
phyloseq_remove_chloro_mitho() -> physeq
## Joining, by = "ASV"
physeq %>%
subset_samples(Experiment == "Continuous") %>%
subset_samples(Paul %!in% c("Paul")) %>%
subset_samples(Reactor != "IR2") -> ps_PolyFermS
We will be analysing only the PolyFermS samples here so take a subset of the physeq object.
physeq %>%
subset_samples(Experiment == "Continuous") %>%
subset_samples(Paul %!in% c("Paul")) %>%
subset_samples(Reactor != "IR2") -> ps_polyFermS
sample_data(ps_polyFermS)$Reactor <- fct_relevel(sample_data(ps_polyFermS)$Reactor, "IR1", "CR", "TR1", "TR2","TR3", "TR4", "TR5", "TR6")
sample_data(ps_polyFermS)$Treatment <- fct_relevel(sample_data(ps_polyFermS)$Treatment, "UNTREATED", "CTX+HV292.1", "CTX","HV292.1","VAN+CCUG59168", "VAN", "CCUG59168")
sample_data(ps_polyFermS)$Reactor_Treatment <- fct_relevel(sample_data(ps_polyFermS)$Reactor_Treatment, "IR1_UNTREATED","CR_UNTREATED", "CR_CTX", "CR_VAN", "TR1_CTX+HV292.1","TR2_CTX", "TR3_HV292.1", "TR5_VAN+CCUG59168", "TR4_VAN", "TR6_CCUG59168")
ps_polyFermS %>%
rarefy_even_depth(sample.size = 4576,
rngseed = 123) -> ps_polyFermS_rare
## `set.seed(123)` was used to initialize repeatable random subsampling.
## Please record this for your records so others can reproduce.
## Try `set.seed(123); .Random.seed` for the full vector
## ...
## 16 samples removedbecause they contained fewer reads than `sample.size`.
## Up to first five removed samples are:
## CR-40-S166ETR1-30-S178ETR1-42-S194ETR2-30-S195IR1-40-S197
## ...
## 50OTUs were removed because they are no longer
## present in any sample after random subsampling
## ...
Compute beta-div metrics:
ps_polyFermS_rare %>%
phyloseq_compute_bdiv(norm = "pc",
phylo = TRUE,
seed = 123) -> bdiv_list
## Loading required package: ape
## Loading required package: GUniFrac
## Loading required package: vegan
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-7
##
## Attaching package: 'vegan'
## The following object is masked from 'package:microbiome':
##
## diversity
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
##
## count
ps_polyFermS_rare %>%
subset_samples(Enrichment == "NotEnriched") %>%
phyloseq_plot_bdiv(bdiv_list,
m = "CoDa",
seed = 123) -> coda
coda$PCA$layers[[1]] = NULL
coda$PCA + geom_point(size=2,
aes(color = Reactor_Treatment,
fill = Reactor_Treatment,
shape = NULL,
alpha = Day_from_Inoculum)) +
theme_light() +
geom_path(arrow = arrow(type = "open", angle = 30, length = unit(0.15, "inches")),
size = 0.05, linetype = "dashed", inherit.aes = TRUE, aes(group=Reactor_Treatment, color = Reactor_Treatment), show.legend = FALSE) +
scale_alpha_continuous(range=c( 0.9, 0.3)) + scale_color_viridis_d(na.value = "black") +
scale_fill_viridis_d(na.value = "black") +
# scale_shape_manual(values = c(8, 21, 22, 23, 24, 16, 15, 18, 17)) +
theme_classic() +
labs(col=NULL, fill = NULL, shape = NULL) + guides(shape=FALSE) -> p1
p1
p1 %>%
ggplotly() -> p1ply
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
p1ply
htmlwidgets::saveWidget(as_widget(p1ply),
paste0(here::here(),
"/data/processed/",
"beta_",
format(Sys.time(), "%Y%b%d"),".html"))
Visualize distance ‘trajectories’ :
We will use the library use dist to get it in the dataframe we want for plotting useDist
ps = coda$physeq_clr
ps %>%
otu_table() %>%
vegan::vegdist(method = "eucl") -> mtest
mtest %>%
usedist::dist_groups(ps %>% get_variable("Reactor_Treatment")) -> dist_df
ps %>%
sample_data() %>%
data.frame() %>%
rownames_to_column('tmpid') %>%
dplyr::select(tmpid, Day_from_Inoculum) -> meta_df
left_join(
dist_df,
meta_df %>%
dplyr::rename("varGroup1" = Day_from_Inoculum),
by = c("Item1" = "tmpid")) %>%
left_join(
meta_df %>%
dplyr::rename("varGroup2" = Day_from_Inoculum),
by = c("Item2" = "tmpid")) -> test
Please carefully check if the code makes sense
test %>%
head()
## Item1 Item2 Group1 Group2 Label Distance
## 1 CR-1-S116 CR-10-S245 CR_UNTREATED CR_UNTREATED Within CR_UNTREATED 20.82583
## 2 CR-1-S116 CR-13-S148 CR_UNTREATED CR_UNTREATED Within CR_UNTREATED 21.99395
## 3 CR-1-S116 CR-14-S336 CR_UNTREATED CR_UNTREATED Within CR_UNTREATED 21.56607
## 4 CR-1-S116 CR-15-S292 CR_UNTREATED CR_UNTREATED Within CR_UNTREATED 22.35332
## 5 CR-1-S116 CR-16-S332 CR_UNTREATED CR_UNTREATED Within CR_UNTREATED 24.76417
## 6 CR-1-S116 CR-17-S97 CR_UNTREATED CR_UNTREATED Within CR_UNTREATED 29.12162
## varGroup1 varGroup2
## 1 24 33
## 2 24 36
## 3 24 37
## 4 24 38
## 5 24 39
## 6 24 40
test %>%
dplyr::filter(grepl("Within", Label)) %>%
dplyr::group_by(Label, Item1, Item2, varGroup1, varGroup2) %>%
dplyr::arrange(varGroup1, varGroup2) %>%
dplyr::mutate(firsto = dplyr::first(Distance)) %>%
dplyr::ungroup() %>%
dplyr::group_by(Label, Item1, varGroup1) %>%
dplyr::summarise(next_dist = first(firsto)) %>%
dplyr::arrange(varGroup1, Label) -> test_prev
## `summarise()` regrouping output by 'Label', 'Item1' (override with `.groups` argument)
test_prev %>%
DT::datatable()
Please carefully check if the code makes sense
test_prev %>%
ggplot(aes(x = as.factor(varGroup1) , y = next_dist)) +
geom_point(size=1, alpha=0.6, aes(colour = Label, group=Label),
position=position_jitterdodge(dodge.width=0.9)) +
geom_path(arrow = arrow(type = "open", angle = 30, length = unit(0.1, "inches")),
size = 0.4, linetype = "dashed", inherit.aes = TRUE, aes(group=Label, color = Label),
position=position_jitterdodge(dodge.width=0.9)) +
# geom_boxplot(aes(group = varGroup2 %>% as.factor()),
# fill = "transparent",
# outlier.colour = NA,alpha=0.4) +
facet_grid(Label ~ ., scales = "fixed") +
# ggrepel::geom_text_repel(cex=2,
# aes(label= Group1),
# segment.color = 'black',
# segment.size = 0.5,
# # nudge_x = -4,
# # nudge_y = 0,
# df_all_t0 %>% filter(Day2 == 6 & metric=="wunifrac") %>% unique()) +
theme_bw() + xlab("Day") + ylab("Distance to prev") +
geom_vline(xintercept = c(23,39),
color="black", alpha=0.4) -> tmp
print(tmp)
tmp + facet_null()
ref = 24
test %>%
dplyr::filter(grepl("Within", Label)) %>%
dplyr::group_by(Label) %>%
dplyr::arrange(varGroup1, varGroup2) %>%
# dplyr::mutate(firsto = dplyr::first(Distance)) %>%
dplyr::filter(varGroup1 == !!ref &
varGroup2 > !!ref) %>%
dplyr::arrange(varGroup2) -> test_ref
Please carefully check if the code/rationale makes sense
I run some checks for the distance to ref:
test_ref %>%
DT::datatable()
sample_to_check <- c("CR-1-S116", "CR-3-S98", "IR1-24-S295", "IR1-81-S298")
subset_samples(ps,
sample_names(ps) %in% sample_to_check) %>%
sample_data() %>%
data.frame() %>%
dplyr::select(Sample_description,Day_of_Connection, Day_of_Treatment, Day_from_Inoculum) %>%
DT::datatable()
mtest %>%
as.matrix() -> mtest_mat
mtest_mat[sample_to_check,sample_to_check]
## CR-1-S116 CR-3-S98 IR1-24-S295 IR1-81-S298
## CR-1-S116 0.00000 11.34670 15.47180 24.69989
## CR-3-S98 11.34670 0.00000 15.15914 24.46117
## IR1-24-S295 15.47180 15.15914 0.00000 24.52264
## IR1-81-S298 24.69989 24.46117 24.52264 0.00000
Please carefully check
test_ref %>%
ggplot(aes(x = as.factor(varGroup2) , y = Distance)) +
geom_point(size=1, alpha=0.6, aes(colour = Group1, group=Group1),
position=position_jitterdodge(dodge.width=0.9)) +
geom_path(arrow = arrow(type = "open", angle = 30, length = unit(0.1, "inches")),
size = 0.4, linetype = "dashed", inherit.aes = TRUE, aes(group=Group1, color = Group1),
position=position_jitterdodge(dodge.width=0.9)) +
# geom_boxplot(aes(group = varGroup2 %>% as.factor()),
# fill = "transparent",
# outlier.colour = NA,alpha=0.4) +
facet_grid(Label ~ ., scales = "fixed") +
# ggrepel::geom_text_repel(cex=2,
# aes(label= Group1),
# segment.color = 'black',
# segment.size = 0.5,
# # nudge_x = -4,
# # nudge_y = 0,
# df_all_t0 %>% filter(Day2 == 6 & metric=="wunifrac") %>% unique()) +
theme_bw() + xlab("Day") + ylab(paste0("Distance to ref :", ref)) +
geom_vline(xintercept = c(23,39),
color="black", alpha=0.4) -> tmp
print(tmp + theme(legend.position = "none") )
Not sure why there is two values for day 26. Not sure why the vertical line is 61 here since I used the same command as the previous plot…
tmp + facet_null()
Next step: ref (should be easy with the dplyr::filter(grepl(“Within”, Label)))
paste0(here::here(),
"/data/processed/",
"beta",
"_",
format(Sys.time(), "%Y%b%d")
,".RData") %>% save.image()
# load("/Users/fconstan/Projects/EZe/ASV/260420.RData")
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] GUniFrac_1.1 matrixStats_0.57.0 vegan_2.5-7 lattice_0.20-41
## [5] permute_0.9-5 ape_5.4-1 reshape2_1.4.4 scales_1.1.1
## [9] microbiome_2.1.26 plotly_4.9.2.2 ampvis2_2.6.4 ggrepel_0.9.0
## [13] speedyseq_0.3.0 phyloseq_1.30.0 forcats_0.5.0 stringr_1.4.0
## [17] dplyr_1.0.2 purrr_0.3.4 readr_1.4.0 tidyr_1.1.2
## [21] tibble_3.0.4 ggplot2_3.3.3 tidyverse_1.3.0
##
## loaded via a namespace (and not attached):
## [1] Rtsne_0.15 colorspace_2.0-0 ellipsis_0.3.1
## [4] rprojroot_2.0.2 XVector_0.26.0 fs_1.5.0
## [7] rstudioapi_0.13 farver_2.0.3 DT_0.16
## [10] ggnet_0.1.0 fansi_0.4.1 lubridate_1.7.9.2
## [13] xml2_1.3.2 codetools_0.2-18 splines_3.6.3
## [16] doParallel_1.0.16 knitr_1.30 ade4_1.7-16
## [19] jsonlite_1.7.2 broom_0.7.3 cluster_2.1.0
## [22] dbplyr_2.0.0 compiler_3.6.3 httr_1.4.2
## [25] backports_1.2.1 assertthat_0.2.1 Matrix_1.3-0
## [28] lazyeval_0.2.2 cli_2.2.0 htmltools_0.5.0
## [31] prettyunits_1.1.1 tools_3.6.3 igraph_1.2.6
## [34] gtable_0.3.0 glue_1.4.2 Rcpp_1.0.5
## [37] Biobase_2.46.0 cellranger_1.1.0 vctrs_0.3.6
## [40] Biostrings_2.54.0 zCompositions_1.3.4 multtest_2.42.0
## [43] nlme_3.1-151 crosstalk_1.1.0.1 iterators_1.0.13
## [46] xfun_0.19 network_1.16.1 rvest_0.3.6
## [49] lifecycle_0.2.0 zlibbioc_1.32.0 MASS_7.3-53
## [52] hms_0.5.3 parallel_3.6.3 biomformat_1.14.0
## [55] rhdf5_2.30.1 RColorBrewer_1.1-2 yaml_2.2.1
## [58] NADA_1.6-1.1 stringi_1.5.3 S4Vectors_0.24.4
## [61] foreach_1.5.1 BiocGenerics_0.32.0 truncnorm_1.0-8
## [64] rlang_0.4.10 pkgconfig_2.0.3 evaluate_0.14
## [67] Rhdf5lib_1.8.0 labeling_0.4.2 patchwork_1.1.1
## [70] htmlwidgets_1.5.3 tidyselect_1.1.0 here_1.0.1
## [73] plyr_1.8.6 magrittr_2.0.1 R6_2.5.0
## [76] IRanges_2.20.2 generics_0.1.0 DBI_1.1.0
## [79] pillar_1.4.7 haven_2.3.1 withr_2.3.0
## [82] mgcv_1.8-33 survival_3.2-7 modelr_0.1.8
## [85] crayon_1.3.4 rmarkdown_2.6 progress_1.2.2
## [88] grid_3.6.3 readxl_1.3.1 data.table_1.13.6
## [91] reprex_0.3.0 digest_0.6.27 usedist_0.4.0
## [94] stats4_3.6.3 munsell_0.5.0 viridisLite_0.3.0